miχpods: MOM6
Contents
miχpods: MOM6#
test out hvplot
build LES catalog
bootstrap error on mean, median?
Try daily vs hourly
add TAO χpods
fix EUC max at 110
Questions:
N2 vs N2T
match time intervals
match vertical resolution
restrict TAO S2 to ADCP depth range
restrict to top 200m.
References#
Warner & Moum (2019)
Setup#
%load_ext watermark
import datetime
import glob
import os
import cf_xarray
import dask
import dcpy
import distributed
import flox.xarray
import holoviews as hv
import hvplot.xarray
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
import xgcm
import pump
from pump import mixpods
hv.notebook_extension('bokeh')
dask.config.set({"array.slicing.split_large_chunks": False})
plt.rcParams["figure.dpi"] = 140
plt.style.use("bmh")
xr.set_options(keep_attrs=True, display_expand_data=False)
gcmdir = "/glade/campaign/cgd/oce/people/bachman/TPOS_1_20_20_year/OUTPUT/" # MITgcm output directory
stationdirname = gcmdir
%watermark -iv
The watermark extension is already loaded. To reload it, use:
%reload_ext watermark
hvplot : 0.8.0
cf_xarray : 0.7.4
ncar_jobqueue: 2021.4.14
ipywidgets : 7.7.1
dask_jobqueue: 0.7.3
xgcm : 0.6.0
json : 2.0.9
dask : 2022.7.0
matplotlib : 3.5.2
distributed : 2022.7.0
pandas : 1.4.3
dcpy : 0.1.dev385+g121534c
xarray : 2022.6.0rc0
numpy : 1.22.4
sys : 3.10.5 | packaged by conda-forge | (main, Jun 14 2022, 07:04:59) [GCC 10.3.0]
pump : 0.1
flox : 0.5.9
import ncar_jobqueue
if "client" in locals():
client.close()
del client
if "cluster" in locals():
cluster.close()
#env = {"OMP_NUM_THREADS": "3", "NUMBA_NUM_THREADS": "3"}
# cluster = distributed.LocalCluster(
# n_workers=8,
# threads_per_worker=1,
# env=env
# )
if "cluster" in locals():
del cluster
#cluster = ncar_jobqueue.NCARCluster(
# project="NCGD0011",
# scheduler_options=dict(dashboard_address=":9797"),
#)
# cluster = dask_jobqueue.PBSCluster(
# cores=9, processes=9, memory="108GB", walltime="02:00:00", project="NCGD0043",
# env_extra=env,
# )
import dask_jobqueue
cluster = dask_jobqueue.PBSCluster(
cores=1, # The number of cores you want
memory='23GB', # Amount of memory
processes=1, # How many processes
queue='casper', # The type of queue to utilize (/glade/u/apps/dav/opt/usr/bin/execcasper)
local_directory='$TMPDIR', # Use your local directory
resource_spec='select=1:ncpus=1:mem=23GB', # Specify resources
project='ncgd0011', # Input your project ID here
walltime='02:00:00', # Amount of wall time
interface='ib0', # Interface to use
)
cluster.adapt(minimum_jobs=2, maximum_jobs=12)
client = distributed.Client(cluster)
client
Client
Client-5bc08d4d-11b7-11ed-94b4-3cecef1b12bc
| Connection method: Cluster object | Cluster type: dask_jobqueue.PBSCluster |
| Dashboard: https://jupyterhub.hpc.ucar.edu/dev/user/dcherian/proxy/8787/status |
Cluster Info
PBSCluster
414f1f1e
| Dashboard: https://jupyterhub.hpc.ucar.edu/dev/user/dcherian/proxy/8787/status | Workers: 0 |
| Total threads: 0 | Total memory: 0 B |
Scheduler Info
Scheduler
Scheduler-967ab8bc-d7cc-4f5c-9367-619b430baf67
| Comm: tcp://10.12.206.55:35183 | Workers: 0 |
| Dashboard: https://jupyterhub.hpc.ucar.edu/dev/user/dcherian/proxy/8787/status | Total threads: 0 |
| Started: Just now | Total memory: 0 B |
Workers
dask.config.set(scheduler=client)
<dask.config.set at 0x2b150da735b0>
Read data#
TAO#
tao_gridded = (
xr.open_dataset(
os.path.expanduser("~/work/pump/zarrs/tao-gridded-ancillary.zarr"), chunks="auto", engine="zarr"
)
.sel(longitude=-140, time=slice("2005-Jun", "2015"))
)
tao_gridded["depth"].attrs["axis"] = "Z"
# eucmax exists
tao_gridded.coords["eucmax"] = pump.calc.get_euc_max(tao_gridded.u.reset_coords(drop=True), kind="data")
#pump.calc.calc_reduced_shear(tao_gridded)
tao_gridded.coords["enso_transition"] = pump.obs.make_enso_transition_mask().reindex(time=tao_gridded.time, method="nearest")
tao_gridded.u.cf.plot()
tao_gridded.eucmax.plot()
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xarray/core/dataset.py:245: UserWarning: The specified Dask chunks separate the stored chunks along dimension "depth" starting at index 58. This could degrade performance. Instead, consider rechunking after loading.
warnings.warn(
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xarray/core/dataset.py:245: UserWarning: The specified Dask chunks separate the stored chunks along dimension "time" starting at index 139586. This could degrade performance. Instead, consider rechunking after loading.
warnings.warn(
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xarray/core/dataset.py:245: UserWarning: The specified Dask chunks separate the stored chunks along dimension "longitude" starting at index 2. This could degrade performance. Instead, consider rechunking after loading.
warnings.warn(
[<matplotlib.lines.Line2D at 0x2b150d367d30>]
tao_gridded = (
tao_gridded.update({
"n2s2pdf": mixpods.pdf_N2S2(
tao_gridded[["S2", "N2T"]].drop_vars(["shallowest", "zeuc"]).rename_vars({"N2T": "N2"})
).load()
}
)
)
tao_gridded.coords["oni"] = pump.obs.process_oni()
tao_gridded.coords["enso_transition_mask"] = mixpods.make_enso_transition_mask(tao_gridded.oni)
tao_Ri = xr.load_dataarray("tao-hourly-Ri-seasonal-percentiles.nc").cf.guess_coord_axis()
MITgcm stations#
metrics = pump.model.read_metrics(stationdirname)
stations = pump.model.read_stations_20(stationdirname)
gcmeq = stations.sel(latitude=0, longitude=[-155.025, -140.025, -125.025, -110.025], method="nearest")
#enso = pump.obs.make_enso_mask()
#mitgcm["enso"] = enso.reindex(time=mitgcm.time.data, method="nearest")
gcmeq["eucmax"] = pump.calc.get_euc_max(gcmeq.u)
pump.calc.calc_reduced_shear(gcmeq)
oni = pump.obs.process_oni()
gcmeq["enso_transition"] = mixpods.make_enso_transition_mask(oni).reindex(time=gcmeq.time.data, method="nearest")
mitgcm = gcmeq.sel(longitude=-140.025, method="nearest")
#metrics_ = xr.align(metrics, mitgcm.expand_dims(["latitude", "longitude"]), join="inner")[0].squeeze()
mitgcm = mitgcm.update({"n2s2pdf": mixpods.pdf_N2S2(mitgcm).load(scheduler=client)})
calc uz
calc vz
calc S2
calc N2
calc shred2
Calc Ri
gcmeq.eucmax.hvplot.line(x="time", by="longitude")